Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
library(flexdashboard)
library(ggplot2)
library(plotly)
Attaching package: ‘plotly’
The following object is masked from ‘package:ggplot2’:
last_plot
The following object is masked from ‘package:stats’:
filter
The following object is masked from ‘package:graphics’:
layout
library(tidyverse)
── Attaching core tidyverse packages ────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.4
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ lubridate 1.9.3 ✔ tibble 3.2.1
✔ purrr 1.0.2 ✔ tidyr 1.3.0── Conflicts ──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks plotly::filter(), stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(RMySQL)
library(DBI)
library(tidytext)
library(wordcloud)
Loading required package: RColorBrewer
# Connect to a SQLite in-memory database
database <- dbConnect(MySQL(),
dbname = "wdl_database",
host = "localhost",
user = "user",
password = "password",
port = 3306)
dfData <- dbGetQuery(database, "SELECT * FROM discord_messages")
Warning: unrecognized MySQL field type 7 in column 6 imported as character
SELECT
UserId,
Name,
COUNT(MessageId) AS TotalMessages,
DATE_FORMAT(Timestamp, '%Y-%m') AS Month
FROM
wdl_database.discord_messages
WHERE
Name IN ('theycallmeq', 'jbuwu', 'snozledozle', 'thefyreprophecy', 'joppertje','lykozen','coeus._','coeus7680')
GROUP BY
UserId, Name, DATE_FORMAT(Timestamp, '%Y-%m')
ORDER BY
DATE_FORMAT(Timestamp, '%Y-%m');
result_messagespermonth<- dbGetQuery(database, "
SELECT
UserId,
Name,
COUNT(MessageId) AS TotalMessages,
DATE_FORMAT(Timestamp, '%Y-%m') AS Month
FROM
wdl_database.discord_messages
WHERE
Name IN ('theycallmeq', 'jbuwu', 'snozledozle', 'thefyreprophecy', 'joppertje','lykozen','coeus._','coeus7680')
GROUP BY
UserId, Name, DATE_FORMAT(Timestamp, '%Y-%m')
ORDER BY
DATE_FORMAT(Timestamp, '%Y-%m');
")
Warning: The working directory was changed to /home/tariq inside a notebook chunk. The working directory will be reset when the chunk is finished running. Use the knitr root.dir option in the setup chunk to change the working directory for notebook chunks.
plot_messagepermonth <- ggplot(result_messagespermonth, aes(x = Month, y = TotalMessages, fill = Name)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, hjust = 1)) + # Rotate x-axis labels if needed
labs(title = "Monthly Messages per User",
x = "Month",
y = "Total Messages")
ggplotly(plot_messagepermonth)
SELECT COUNT(*) as TotalMessages
FROM wdl_database.discord_messages;
result_totalmessages<- dbGetQuery(database, "
SELECT COUNT(*) as TotalMessages
FROM wdl_database.discord_messages;
")
SELECT Name, COUNT(distinct Id) as MostMessages
FROM wdl_database.discord_messages
GROUP BY Name
ORDER BY MostMessages DESC
LIMIT 1;
result_mostmessages<- dbGetQuery(database, "
SELECT Name, COUNT(distinct Id) as MostMessages
FROM wdl_database.discord_messages
GROUP BY Name
ORDER BY MostMessages DESC
LIMIT 1;
")
SELECT *
FROM wdl_database.discord_messages
WHERE Content LIKE '%http%';
SELECT PremiumType,
GROUP_CONCAT(DISTINCT Name ORDER BY Name SEPARATOR ', ') AS Users,
COUNT(DISTINCT Name) AS UniqueUserCount
FROM wdl_database.discord_messages
GROUP BY PremiumType;
result_nitrodistrobutionperusers<- dbGetQuery(database, "
SELECT PremiumType,
GROUP_CONCAT(DISTINCT Name ORDER BY Name SEPARATOR ', ') AS Users,
COUNT(DISTINCT Name) AS UniqueUserCount
FROM wdl_database.discord_messages
GROUP BY PremiumType;
")
plot_ly(result_nitrodistrobutionperusers, labels = ~PremiumType, values = ~UniqueUserCount, type = 'pie',
textinfo = 'label+percent', insidetextorientation = 'radial') %>%
layout(title = 'Distribution of Premium Types Among Users')
NA
SELECT
Name,
COUNT(MessageId) AS TotalMessages
FROM
wdl_database.discord_messages
WHERE
Name IN ('theycallmeq', 'jbuwu', 'snozledozle', 'thefyreprophecy', 'joppertje','lykozen','coeus._','coeus7680')
GROUP BY
Name
HAVING
COUNT(MessageId) >= 100;
result_totalmessagesperuser<- dbGetQuery(database, "
SELECT
Name,
COUNT(MessageId) AS TotalMessages
FROM
wdl_database.discord_messages
WHERE
Name IN ('theycallmeq', 'jbuwu', 'snozledozle', 'thefyreprophecy', 'joppertje','lykozen','coeus._','coeus7680')
GROUP BY
Name
HAVING
COUNT(MessageId) >= 100;
")
plot_totalmessagesperuser <- ggplot(result_totalmessagesperuser, aes(x = Name, y = TotalMessages, fill = Name)) +
geom_bar(stat = "identity") +
theme_minimal() +
labs(title = "Total Messages per User",
x = "User Name",
y = "Total Messages") +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Rotate x-axis labels for readability
# Convert to an interactive plotly object
ggplotly(plot_totalmessagesperuser)
word_frequencies <- dfData %>%
unnest_tokens(word, Content) %>% # Split text into words
count(word, sort = TRUE) # Count and sort by frequency
top_words <- head(word_frequencies, 50)
print(top_words)
# Assuming top_words is already created and contains the top 50 words
plot_topwords <- ggplot(top_words, aes(x = reorder(word, n), y = n, fill = word)) +
geom_bar(stat = "identity") +
coord_flip() + # Flips the coordinates to make the plot horizontal
scale_fill_viridis_d() + # Use the viridis color palette for discrete data
labs(title = "Top 50 Word Frequencies",
x = "Words",
y = "Frequency") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 90, hjust = 1), # Adjust x-axis labels for readability
legend.position = "none") # Hide the legend
# Convert to an interactive plotly object
ggplotly(plot_topwords)
NA